#!/usr/bin/env python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
import re
import string

import slugify

# order is important
_licenses = {'MIT': ['Permission is hereby granted free of charge', 'The above copyright notice and this permission notice shall'],
             'BSD-3': ['Redistributions of source code must retain the above copyright', 'Redistributions in binary form must reproduce the above copyright', 'specific prior written permission'],
             'BSD-2': ['Redistributions of source code must retain the above copyright', 'Redistributions in binary form must reproduce the above copyright'],
             'AL': ['http://www.apache.org/licenses/LICENSE-2.0']}


def get_notices():
    license_file = open("../LICENSE", "r")
    regex = r"\((.+?)\) (.+?) \((http.+?)\)"

    return list(filter(None, [re.findall(regex, line) for line in license_file]))


def parse_license_file(project_name):
    name = re.match("^[a-z0-9\-]+", project_name.lower())
    name = slugify.slugify(name.group(0))
    path = "../licenses/LICENSE-{}.txt".format(name)
    if os.path.exists(path):
        data = " ".join(line.strip() for line in open(path)).lower()
        data = data.translate(None, string.punctuation)
        for k in _licenses:
            matches = 0
            for v in _licenses[k]:
                if v.lower() in data:
                    matches += 1
            if matches == len(_licenses[k]):
                return k

    return False


if __name__ == "__main__":
    print("{:<30}|{:<50}||{:<20}||{:<10}"
          .format("PROJECT", "URL", "LICENSE TYPE DEFINED", "DETECTED"))

    notices = get_notices()

    for notice in notices:
        notice = notice[0]
        license = parse_license_file(notice[1])
        print("{:<30}|{:<50}||{:<20}||{:<10}"
              .format(notice[1], notice[2][:50], notice[0], license))

    file_count = len([name for name in os.listdir("../licenses")])
    print("Defined licenses: {} Files found: {}".format(len(notices), file_count))
